#!/bin/sh
# Use multi-level block permutation to test which behavioral measures achieved predictable 
# accuracies across all subjects (including all ethnicities/races, not only AA and WA).
#
# Author: Jingwei Li

DIR="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"

HCP_dir="/mnt/isilon/CSC1/Yeolab/Data/HCP/S1200"
rstr_csv="$HCP_dir/scripts/restricted_hcp_data/\
RESTRICTED_jingweili_4_12_2017_1200subjects_fill_empty_zygosityGT_by_zygositySR.csv"

proj_dir="/home/jingweil/storage/MyProject/fairAI/HCP_race"
subj_ls="$proj_dir/scripts/lists/subjects_wIncome_948.txt"
bhvr_ls="$proj_dir/scripts/lists/Cognitive_Personality_Task_Social_Emotion_51_matched.txt"
colloq_ls="$proj_dir/scripts/lists/colloquial_names_51_matched.txt"

maxKRR_iter=400
Nperm=1000

main() {
    cmd="matlab -nodesktop -nodisplay -nojvm -r \" cd(fileparts('$DIR')); HCP_addpath; \
        HCP_KRR_predictable_behaviors('$KRR_dir', $maxKRR_iter, $Nperm, '$test_metric', \
        '$intrim_csv', '$outmat', '$rstr_csv', '$subj_ls', '$bhvr_ls', '$colloq_ls'); \
        exit; \" "
    work_dir=$KRR_dir/logs
    jname=perm_predictability
    $CBIG_CODE_DIR/setup/CBIG_pbsubmit -cmd "$cmd" -walltime 06:00:00 -mem 10G -ncpus 5 \
        -name $jname -joberr $work_dir/$jname.err -jobout $work_dir/$jname.out
}

#############################
# Function usage
#############################
usage() { echo "
NAME:
    HCP_KRR_predictable_behavior.sh

DESCRIPTION:
    Use multi-level block permutation to test which behavioral measures achieved predictable 
    accuracies across all subjects (including all ethnicities/races, not only AA and WA).
    Multi-level block permutation is performed by the FSL PALM package.

REQUIRED ARGUMENTS:
    -KRR_dir      <KRR_dir>     : The directory which contains the kenel ridge regression 
                                  results including the trained models and behavioral 
                                  predictions.
    -test_metric  <test_metric> : The accuracy metric used to perform statistical testing.
                                  Choose from 'predictive COD' and 'corr'.
    -intrim_csv   <intrim_csv>  : Full path of the intermediate csv file generated by 
                                  hcp2block2 function (a function inside the PALM package).
    -outmat       <outmat>      : Full path of the output mat file storing the behaviors 
                                  whose actual prediction accuracy was significantly above 
                                  chance.

OPTIONAL ARGUMENTS:
    -maxKRR_iter  <maxKRR_iter> : Maximal random seed used to split the training-test folds 
                                  for performing KRR, e.g. 400.
    -rstr_csv     <rstr_csv>    : The restricted CSV file downloaded from the HCP website. 
                                  Default (only for the author's test purpose): 
                                  /mnt/isilon/CSC1/Yeolab/Data/HCP/S1200/scripts/restricted_hcp_data/\\
                                  RESTRICTED_jingweili_4_12_2017_1200subjects_fill_empty_zygosityGT_by_zygositySR.csv
    -subj_ls      <subj_ls>     : Subject list (full path). Each line corresponds to one subject.
                                  Default (only for the author's testing purpose):
                                  /home/jingweil/storage/MyProject/fairAI/HCP_race/scripts/lists/\\
                                  subjects_wIncome_948.txt
    -bhvr_ls      <bhvr_ls>     : List of behavioral measures for which matched AA and 
                                  matched WA can be found (absolute path). 
                                  Default (only for the author's testing purpose):
                                  /home/jingweil/storage/MyProject/fairAI/HCP_race/scripts/lists/\\
                                  Cognitive_Personality_Task_Social_Emotion_51_matched.txt
    -colloq_ls    <colloq_ls>   : List of colloquial names of behavioral variables. The 
                                  colloquial names should correspond to the behavioral 
                                  names in 'bhvr_ls'.
EXAMPLE:
    $DIR/HCP_predictable_behavior.sh \\
    -KRR_dir '/your/KRR/output/dir/' -maxKRR_iter 400 -test_metric predictive_COD \\
    -intrim_csv '/intermediate/block_design.csv' -outmat '/your/output.mat' \\
    -rstr_csv '/your/HCP_restricted.csv' -subj_ls '/your/subject/list.txt' \\
    -bhvr_ls '/your/matched/behavioral/list.txt' -colloq_ls '/your/colloquial/names/list.txt'
" 1>&2; exit 1; }

##########################################
# ERROR message
##########################################	
arg1err() {
	echo "ERROR: flag $flag requires one argument"
	exit 1
}

##########################################
# Parse Arguments 
##########################################
# Display help message if no argument is supplied
if [ $# -eq 0 ]; then
	usage; 1>&2; exit 1
fi

while [[ $# -gt 0 ]]; do
    flag=$1; shift;

    case $flag in
        -KRR_dir)
            KRR_dir=$1
            shift;;
        -maxKRR_iter)
            maxKRR_iter=$1
            shift;;
        -test_metric)
            test_metric=$1
            shift;;
        -intrim_csv)
            intrim_csv=$1
            shift;;
        -outmat)
            outmat=$1
            shift;;
        -rstr_csv)
            rstr_csv=$1
            shift;;
        -subj_ls)
            subj_ls=$1
            shift;;
        -bhvr_ls)
            bhvr_ls=$1
            shift;;
        -colloq_ls)
            colloq_ls=$1
            shift;;
        *)
            echo "Unknown flag $flag"
            usage; 1>&2; exit 1;;
    esac
done


##########################################
# Check Parameters
##########################################
if [ -z "$KRR_dir" ]; then
    arg1err "-KRR_dir"
fi
if [ -z "$test_metric" ]; then
    arg1err "-test_metric"
fi
if [ -z "$intrim_csv" ]; then
    arg1err "-intrim_csv"
fi
if [ -z "$outmat" ]; then
    arg1err "-outmat"
fi

main
